From 90084c5e4d3dfc43a414f3028c657f90bacb23f2 Mon Sep 17 00:00:00 2001
From: Daniel Sabo <DanielSabo@gmail.com>
Date: Sat, 13 Jul 2013 23:42:08 -0700
Subject: [PATCH] Increase the precision of ref gamma conversions

Use a 3 round Newtons method for the fast version. This should be
fully accurate for floats in [0, 1], less so for very small values
but they're in the the linear part of the sRGB curve.

For the reference conversions the standard C pow() function is
used, which give accuracy far in excess of anything we need.
---
 babl/base/pow-24.c      | 29 ++++++++++------------------
 babl/base/util.h        | 21 ++++++++++++---------
 extensions/float.c      | 42 ++++++++++++++++++++---------------------
 extensions/sse2-float.c |  8 ++++----
 4 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/babl/base/pow-24.c b/babl/base/pow-24.c
index 401a102..a3bb36e 100644
--- a/babl/base/pow-24.c
+++ b/babl/base/pow-24.c
@@ -44,39 +44,30 @@ init_newton (double x, double exponent, double c0, double c1, double c2)
 }
 
 /* Returns x^2.4 == (x*(x^(-1/5)))^3, using Newton's method for x^(-1/5).
- * Max absolute error is 4e-8.
- * One more iteration of Newton would bring error down to 4e-15.
- *
- * (The above measurements apply to the input range can that be involved in
- * gamma correction. Accuracy for inputs very close to 0 is somewhat worse,
- * but that will hit the linear part of the gamma curve rather than call these.
- * Out-of-range pixels >1.3 are also somewhat worse.)
  */
 double
 babl_pow_24 (double x)
 {
   double y = init_newton (x, -1./5, 0.9953189663, 0.9594345146, 0.6742970332);
   int i;
-  for (i = 0; i < 2; i++)
+  for (i = 0; i < 3; i++)
     y = (1.+1./5)*y - ((1./5)*x*(y*y))*((y*y)*(y*y));
   x *= y;
   return x*x*x;
 }
 
-/* Returns x^(1/2.4) == x*(x^(-1/12))^7, using Newton's method for x^(-1/12).
- * Max absolute error is 7e-8
- * One more iteration of Newton would bring error down to 4e-14.
+/* Returns x^(1/2.4) == x*((x^(-1/6))^(1/2))^7, using Newton's method for x^(-1/6).
  */
 double
 babl_pow_1_24 (double x)
 {
-  double y = init_newton (x, -1./12, 0.9976740658, 0.9885035151, 0.5907708377);
+  double y = init_newton (x, -1./12, 0.9976800269, 0.9885126933, 0.5908575383);
   int i;
-  for (i = 0; i < 2; i++)
-    {
-      double z = (y*y)*(y*y);
-      z = (y*z)*(z*z);
-      y = (1.+1./12)*y - (1./12)*x*z;
-    }
-  return ((x*y)*(y*y))*((y*y)*(y*y));
+  double z;
+  x = sqrt (x);
+  /* newton's method for x^(-1/6) */
+  z = (1./6.) * x;
+  for (i = 0; i < 3; i++)
+    y = (7./6.) * y - z * ((y*y)*(y*y)*(y*y*y));
+  return x*y;
 }
diff --git a/babl/base/util.h b/babl/base/util.h
index 77344d4..cfb17b0 100644
--- a/babl/base/util.h
+++ b/babl/base/util.h
@@ -59,29 +59,32 @@
 static inline double
 linear_to_gamma_2_2 (double value)
 {
-#if 0
   if (value > 0.003130804954)
     return 1.055 * pow (value, (1.0/2.4)) - 0.055;
   return 12.92 * value;
-#else
-  if (value > 0.003130804954)
-    return 1.055 * babl_pow_1_24 (value) - 0.055;
-  return 12.92 * value;
-#endif
 }
 
 static inline double
 gamma_2_2_to_linear (double value)
 {
-#if 0
   if (value > 0.04045)
     return pow ((value + 0.055) / 1.055, 2.4);
   return value / 12.92;
-#else
+}
+static inline double
+babl_linear_to_gamma_2_2 (double value)
+{
+  if (value > 0.003130804954)
+    return 1.055 * babl_pow_1_24 (value) - 0.055;
+  return 12.92 * value;
+}
+
+static inline double
+babl_gamma_2_2_to_linear (double value)
+{
   if (value > 0.04045)
     return babl_pow_24 ((value + 0.055) / 1.055);
   return value / 12.92;
-#endif
 }
 
 #else
diff --git a/extensions/float.c b/extensions/float.c
index 067d4e9..5cbbeb6 100644
--- a/extensions/float.c
+++ b/extensions/float.c
@@ -41,9 +41,9 @@ conv_rgbaF_linear_rgbAF_gamma (unsigned char *src,
    while (n--)
      {
        float alpha = fsrc[3];
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++) * alpha;
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++) * alpha;
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++) * alpha;
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++) * alpha;
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++) * alpha;
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++) * alpha;
        *fdst++ = *fsrc++;
      }
   return samples;
@@ -71,17 +71,17 @@ conv_rgbAF_linear_rgbAF_gamma (unsigned char *src,
          }
        else if (alpha >= 1.0)
          {
-           *fdst++ = linear_to_gamma_2_2 (*fsrc++);
-           *fdst++ = linear_to_gamma_2_2 (*fsrc++);
-           *fdst++ = linear_to_gamma_2_2 (*fsrc++);
+           *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
+           *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
+           *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
            *fdst++ = *fsrc++;
          }
        else
          {
            float alpha_recip = 1.0 / alpha;
-           *fdst++ = linear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
-           *fdst++ = linear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
-           *fdst++ = linear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
            *fdst++ = *fsrc++;
          }
      }
@@ -99,9 +99,9 @@ conv_rgbaF_linear_rgbaF_gamma (unsigned char *src,
 
    while (n--)
      {
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++);
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++);
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
        *fdst++ = *fsrc++;
      }
   return samples;
@@ -118,9 +118,9 @@ conv_rgbF_linear_rgbF_gamma (unsigned char *src,
 
    while (n--)
      {
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++);
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++);
-       *fdst++ = linear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = babl_linear_to_gamma_2_2 (*fsrc++);
      }
   return samples;
 }
@@ -137,9 +137,9 @@ conv_rgbaF_gamma_rgbaF_linear (unsigned char *src,
 
    while (n--)
      {
-       *fdst++ = gamma_2_2_to_linear (*fsrc++);
-       *fdst++ = gamma_2_2_to_linear (*fsrc++);
-       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = babl_gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = babl_gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = babl_gamma_2_2_to_linear (*fsrc++);
        *fdst++ = *fsrc++;
      }
   return samples;
@@ -156,9 +156,9 @@ conv_rgbF_gamma_rgbF_linear (unsigned char *src,
 
    while (n--)
      {
-       *fdst++ = gamma_2_2_to_linear (*fsrc++);
-       *fdst++ = gamma_2_2_to_linear (*fsrc++);
-       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = babl_gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = babl_gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = babl_gamma_2_2_to_linear (*fsrc++);
      }
   return samples;
 }
diff --git a/extensions/sse2-float.c b/extensions/sse2-float.c
index 7536cb6..97b201b 100644
--- a/extensions/sse2-float.c
+++ b/extensions/sse2-float.c
@@ -401,7 +401,7 @@ conv_yaF_linear_yaF_gamma (const float *src, float *dst, long samples)
 
   while (samples--)
     {
-      *dst++ = linear_to_gamma_2_2 (*src++);
+      *dst++ = babl_linear_to_gamma_2_2 (*src++);
       *dst++ = *src++;
     }
 
@@ -439,7 +439,7 @@ conv_yaF_gamma_yaF_linear (const float *src, float *dst, long samples)
 
   while (samples--)
     {
-      *dst++ = gamma_2_2_to_linear (*src++);
+      *dst++ = babl_gamma_2_2_to_linear (*src++);
       *dst++ = *src++;
     }
 
@@ -480,7 +480,7 @@ conv_yF_linear_yF_gamma (const float *src, float *dst, long samples)
 
   while (samples--)
     {
-      *dst++ = linear_to_gamma_2_2 (*src++);
+      *dst++ = babl_linear_to_gamma_2_2 (*src++);
     }
 
   return total;
@@ -520,7 +520,7 @@ conv_yF_gamma_yF_linear (const float *src, float *dst, long samples)
 
   while (samples--)
     {
-      *dst++ = gamma_2_2_to_linear (*src++);
+      *dst++ = babl_gamma_2_2_to_linear (*src++);
     }
 
   return total;
-- 
2.30.2